# List of required packages
required_packages <- c(
  "tidyverse",   # Data manipulation and visualization
  "nnet",        # Multinomial logistic regression
  "knitr",       # Table formatting
  "kableExtra",  # Enhanced table formatting
  "patchwork",   # Plot combining
  "broom"        # Tidy model outputs
)

# Identify and install any missing packages
new_packages <- required_packages[!(required_packages %in% installed.packages()[, "Package"])]
if (length(new_packages)) install.packages(new_packages)

# Load all required packages
lapply(required_packages, library, character.only = TRUE)

# Create output directories (same as in 01_)
dir.create("outputs", showWarnings = FALSE)
dir.create("outputs/tables", showWarnings = FALSE)
dir.create("outputs/figures", showWarnings = FALSE)

# Define party order (Abstainer as reference, but will not be plotted)
party_order <- c("Abstainer/null", "Podemos", "Sumar", "PSOE", "Other", "PP", "SALF", "VOX")
plot_parties <- c("Podemos", "Sumar", "PSOE", "Other", "PP", "SALF", "VOX")  # Parties to show in plots

# Define party colors (same as in 01_)
party_colors <- c(
  "Podemos" = "#984EA3",        # Purple
  "Sumar" = "#FF69B4",          # Hot Pink
  "PSOE" = "#E41A1C",           # Red
  "VOX" = "#4DAF4A",            # Green
  "PP" = "#377EB8",             # Blue
  "Other" = "#808080",
  "SALF" = "#A6761D"            # Brown
)

# Define survey labels for more readable names
survey_labels <- c(
  "andpol" = "Original survey", 
  "cis_bjul" = "CIS July", 
  "cis_bjun" = "CIS June", 
  "db40_july" = "40dB July", 
  "cis_campaign" = "CIS EP Campaign", 
  "cis_pos" = "CIS EP Post-electoral", 
  "cis_pre" = "CIS EP Pre-electoral", 
  "db40_june" = "40dB June", 
  "gesop" = "GESOP", 
  "ees" = "EES", 
  "db40_august" = "40dB August",
  "Overall" = "Overall"
)

# Load and preprocess data (similar to 01_, but ensure we use the same data preprocessing)
data <- read_csv("merged_survey_data.csv") %>%
  filter(party_pref %in% party_order) %>%
  mutate(
    lr = if_else(survey == "gesop", lr * 2, lr),
    gender = if_else(gender %in% c(NA, "Other"), NA_character_, gender),
    gender = factor(gender, levels = c("Female", "Male")),
    # Create binary education variable (tertiary or less)
    edu_binary = case_when(
      edu_fct == "Tertiary studies" ~ "Tertiary",
      !is.na(edu_fct) ~ "Non-tertiary",
      TRUE ~ NA_character_
    ),
    edu_binary = factor(edu_binary, levels = c("Non-tertiary", "Tertiary")),
    # Set Abstainer as reference level
    party_pref = factor(party_pref, levels = party_order),
    # Ensure survey is a factor for the combined model
    survey = factor(survey)
  ) %>%
  drop_na(gender, age_group, edu_binary, party_pref, lr)  # Changed from age to age_group

#==========================================================================================
# REGRESSION MODELS AND AME CALCULATION
#==========================================================================================

# Function to extract model fit statistics
get_model_fit <- function(model, data) {
  # Log-likelihood
  ll <- logLik(model)[1]
  
  # AIC
  aic_value <- AIC(model)
  
  # BIC
  bic_value <- BIC(model)
  
  # Calculate McFadden's R² (same as in sociodemographic script)
  # First, the null model (intercept only)
  null_model <- tryCatch({
    multinom(party_pref ~ 1, data = data, trace = FALSE)
  }, error = function(e) {
    return(NULL)
  })
  
  mcfadden_r2 <- if(!is.null(null_model)) {
    ll_null <- logLik(null_model)[1]
    1 - (ll / ll_null)
  } else {
    NA
  }
  
  # Number of observations
  n_obs <- nrow(data)
  
  # Degrees of freedom
  df <- attr(logLik(model), "df")
  
  # Return as a named list
  list(
    logLik = ll,
    AIC = aic_value,
    BIC = bic_value,
    McFadden_R2 = mcfadden_r2,
    n_obs = n_obs,
    df = df
  )
}

# Function to run multinomial models for party preference with LR as predictor
get_lr_model_results <- function(survey_name) {
  # Filter data for this survey
  survey_data <- data %>% filter(survey == survey_name)
  
  # Check data sufficiency
  if(nrow(survey_data) < 100) {
    message("Insufficient data for survey ", survey_name, " (n=", nrow(survey_data), ")")
    return(NULL)
  }
  
  party_counts <- table(survey_data$party_pref)
  if(sum(party_counts >= 10) < 3) {
    message("Insufficient party variation for survey ", survey_name)
    return(NULL)
  }
  
  # Report rare categories
  rare_parties <- names(party_counts[party_counts < 20])
  if(length(rare_parties) > 0) {
    message("Warning: Rare party categories in survey ", survey_name, ": ", 
            paste(rare_parties, collapse=", "))
  }
  
  # Fit multinomial model with party_pref as DV and lr as IV
  model <- tryCatch({
    multinom(party_pref ~ lr + gender + age_group + edu_binary, data = survey_data, trace = FALSE, maxit = 500)  # Changed from age to age_group
  }, error = function(e) {
    message("Error in model for survey ", survey_name, ": ", e$message)
    return(NULL)
  })
  
  if(is.null(model)) return(NULL)
  
  # Extract model results using manual approach
  coefs <- summary(model)$coefficients
  ses <- summary(model)$standard.errors
  z_values <- coefs / ses
  p_values <- 2 * (1 - pnorm(abs(z_values)))
  
  # Create a more structured coefficient data frame
  coef_data <- data.frame()
  for(i in 1:nrow(coefs)) {
    party <- rownames(coefs)[i]
    for(j in 1:ncol(coefs)) {
      term <- colnames(coefs)[j]
      estimate <- coefs[i, j]
      std.error <- ses[i, j]
      p.value <- p_values[i, j]
      
      # Add significance stars
      stars <- case_when(
        p.value < 0.001 ~ "***",
        p.value < 0.01 ~ "**",
        p.value < 0.05 ~ "*",
        p.value < 0.1 ~ ".",
        TRUE ~ ""
      )
      
      # Format coefficient with standard error and stars
      coef_with_se <- sprintf("%.3f%s\n(%.3f)", estimate, stars, std.error)
      
      coef_data <- rbind(coef_data, data.frame(
        party = party,
        term = term,
        estimate = estimate,
        std.error = std.error,
        p.value = p.value,
        stars = stars,
        coef_with_se = coef_with_se,
        stringsAsFactors = FALSE
      ))
    }
  }
  
  # Extract just the log-odds for LR
  lr_log_odds <- coef_data %>%
    filter(term == "lr") %>%
    mutate(survey = survey_name) %>%
    select(survey, party, estimate, std.error, stars)
  
  # Calculate AMEs for left-right position
  ames <- tryCatch({
    # Create counterfactual datasets for different LR values
    # Get range of LR to create reasonable steps
    lr_min <- min(survey_data$lr, na.rm = TRUE)
    lr_max <- max(survey_data$lr, na.rm = TRUE)
    
    # Create a single unit change in LR (from average to average+1)
    base_lr <- mean(survey_data$lr, na.rm = TRUE)
    plus_one_lr <- base_lr + 1
    
    # Create prediction datasets
    base_data <- survey_data %>% mutate(lr = base_lr)
    plus_one_data <- survey_data %>% mutate(lr = plus_one_lr)
    
    # Calculate predicted probabilities
    base_pred <- predict(model, newdata = base_data, type = "probs")
    plus_one_pred <- predict(model, newdata = plus_one_data, type = "probs")
    
    # Calculate differences in probabilities
    lr_diff <- plus_one_pred - base_pred
    
    # Create AME dataframe
    tibble(
      party = colnames(lr_diff),
      predictor = "Left-Right",
      variable = "Left-Right Position (+1 unit)",
      AME = colMeans(lr_diff),
      SE = apply(lr_diff, 2, sd) / sqrt(nrow(lr_diff))
    ) %>%
      # Add significance stars
      mutate(
        z_value = AME / SE,
        p_value = 2 * (1 - pnorm(abs(z_value))),
        stars = case_when(
          p_value < 0.001 ~ "***",
          p_value < 0.01 ~ "**",
          p_value < 0.05 ~ "*",
          p_value < 0.1 ~ ".",
          TRUE ~ ""
        ),
        ame_with_se = sprintf("%.3f%s\n(%.3f)", AME, stars, SE)
      )
  }, error = function(e) {
    message("Error calculating AMEs for survey ", survey_name, ": ", e$message)
    return(NULL)
  })
  
  # Calculate model fit statistics
  model_fit <- get_model_fit(model, survey_data)
  
  # Return all results
  list(
    survey = survey_name,
    coefficients = coef_data,
    lr_log_odds = lr_log_odds,  # Added log-odds for LR
    ames = ames,
    n = nrow(survey_data),
    model = model,
    model_fit = model_fit
  )
}

# NEW: Function to run a combined model with all data, using survey as control
get_combined_model_results <- function(data) {
  message("Running combined model with survey as control...")
  
  # Fit multinomial model with survey as control
  model <- tryCatch({
    multinom(party_pref ~ lr + gender + age_group + edu_binary + survey,  # Changed from age to age_group
             data = data, trace = FALSE, maxit = 1000)
  }, error = function(e) {
    message("Error in combined model: ", e$message)
    return(NULL)
  })
  
  if(is.null(model)) return(NULL)
  
  # Extract model results using manual approach
  coefs <- summary(model)$coefficients
  ses <- summary(model)$standard.errors
  z_values <- coefs / ses
  p_values <- 2 * (1 - pnorm(abs(z_values)))
  
  # Create a structured coefficient data frame
  coef_data <- data.frame()
  for(i in 1:nrow(coefs)) {
    party <- rownames(coefs)[i]
    for(j in 1:ncol(coefs)) {
      term <- colnames(coefs)[j]
      estimate <- coefs[i, j]
      std.error <- ses[i, j]
      p.value <- p_values[i, j]
      
      # Add significance stars
      stars <- case_when(
        p.value < 0.001 ~ "***",
        p.value < 0.01 ~ "**",
        p.value < 0.05 ~ "*",
        p.value < 0.1 ~ ".",
        TRUE ~ ""
      )
      
      # Format coefficient with standard error and stars
      coef_with_se <- sprintf("%.3f%s\n(%.3f)", estimate, stars, std.error)
      
      coef_data <- rbind(coef_data, data.frame(
        party = party,
        term = term,
        estimate = estimate,
        std.error = std.error,
        p.value = p.value,
        stars = stars,
        coef_with_se = coef_with_se,
        stringsAsFactors = FALSE
      ))
    }
  }
  
  # Extract just the log-odds for LR
  lr_log_odds <- coef_data %>%
    filter(term == "lr") %>%
    mutate(survey = "Overall") %>%
    select(survey, party, estimate, std.error, stars)
  
  # Calculate AMEs for left-right position
  ames <- tryCatch({
    # Create counterfactual datasets for different LR values
    base_lr <- mean(data$lr, na.rm = TRUE)
    plus_one_lr <- base_lr + 1
    
    # Create prediction datasets
    base_data <- data %>% mutate(lr = base_lr)
    plus_one_data <- data %>% mutate(lr = plus_one_lr)
    
    # Calculate predicted probabilities
    base_pred <- predict(model, newdata = base_data, type = "probs")
    plus_one_pred <- predict(model, newdata = plus_one_data, type = "probs")
    
    # Calculate differences in probabilities
    lr_diff <- plus_one_pred - base_pred
    
    # Create AME dataframe
    tibble(
      party = colnames(lr_diff),
      predictor = "Left-Right",
      variable = "Left-Right Position (+1 unit)",
      AME = colMeans(lr_diff),
      SE = apply(lr_diff, 2, sd) / sqrt(nrow(lr_diff))
    ) %>%
      # Add significance stars
      mutate(
        z_value = AME / SE,
        p_value = 2 * (1 - pnorm(abs(z_value))),
        stars = case_when(
          p_value < 0.001 ~ "***",
          p_value < 0.01 ~ "**",
          p_value < 0.05 ~ "*",
          p_value < 0.1 ~ ".",
          TRUE ~ ""
        ),
        ame_with_se = sprintf("%.3f%s\n(%.3f)", AME, stars, SE)
      )
  }, error = function(e) {
    message("Error calculating AMEs for combined model: ", e$message)
    return(NULL)
  })
  
  # Calculate model fit statistics
  model_fit <- get_model_fit(model, data)
  
  # Return all results
  list(
    survey = "Overall",  # Label as "Overall" instead of "Mean"
    coefficients = coef_data,
    lr_log_odds = lr_log_odds,  # Added log-odds for LR
    ames = ames,
    n = nrow(data),
    model = model,
    model_fit = model_fit
  )
}

# Get results for each survey
unique_surveys <- unique(data$survey)
survey_results <- map(unique_surveys, get_lr_model_results)
names(survey_results) <- unique_surveys

# Remove NULL results
valid_results <- Filter(Negate(is.null), survey_results)

# Run the combined model and add to valid_results
combined_result <- get_combined_model_results(data)
if(!is.null(combined_result)) {
  valid_results[["overall"]] <- combined_result
  message("Combined model added to results successfully")
} else {
  message("WARNING: Combined model failed to run")
}

#==========================================================================================
# FIGURE GENERATION - AMEs
#==========================================================================================

# Prepare data for plots
plot_ames <- map_dfr(valid_results, function(result) {
  if(is.null(result$ames)) return(NULL)
  
  result$ames %>%
    filter(party %in% plot_parties) %>%
    mutate(
      survey = result$survey,
      sample_size = result$n
    )
})

# Calculate survey sample sizes per party
survey_sizes <- data %>%
  count(survey, party_pref) %>%
  rename(party = party_pref, party_sample_size = n) %>%
  filter(party %in% plot_parties)

# Combine AMEs with sample sizes
plot_data <- plot_ames %>%
  left_join(survey_sizes, by = c("survey", "party"))

# Get overall AMEs for plotting (from combined model)
overall_ames <- plot_data %>%
  filter(survey == "Overall") %>%
  mutate(
    lower = AME - 1.96 * SE,
    upper = AME + 1.96 * SE,
    predictor_value = paste(predictor, variable, sep = "_"),
    total_sample = sample_size  # Use combined model sample size
  )

# Ensure parties appear in the original party_order
plot_data$party <- factor(plot_data$party, levels = plot_parties)
overall_ames$party <- factor(overall_ames$party, levels = plot_parties)

# Define survey shapes for plotting
survey_shapes <- c(
  "andpol" = 1, "cis_bjul" = 2, "cis_bjun" = 0, "db40_july" = 5, 
  "cis_campaign" = 6, "cis_pos" = 3, "cis_pre" = 4, "db40_june" = 7,
  "gesop" = 8, "ees" = 9, "db40_august" = 10
)

# Create LR effect plot
lr_ame_plot <- ggplot() +
  # Overall AMEs with error bars
  geom_point(data = overall_ames, 
             aes(x = party, y = AME, color = party, size = total_sample),
             alpha = 0.7) +
  geom_linerange(data = overall_ames, 
                 aes(x = party, ymin = lower, ymax = upper, color = party),
                 size = 1, alpha = 0.7) +
  # Survey-specific AMEs
  geom_point(data = plot_data %>% filter(survey != "Overall"), 
             aes(x = party, y = AME, color = party, size = party_sample_size, shape = survey),
             position = position_jitter(width = 0.2, height = 0), alpha = 0.5) +
  # Value labels
  geom_text(data = overall_ames, 
            aes(x = party, y = AME, label = sprintf("%.3f", AME)),
            color = "black", vjust = -1, size = 3) +
  # Reference line at 0 (no effect)
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  # Styling
  scale_color_manual(values = party_colors, guide = "none") +
  scale_size_continuous(name = "Sample size (N)", 
                        breaks = c(50, 100, 500, 1000, 5000),
                        labels = scales::comma(c(50, 100, 500, 1000, 5000)),
                        range = c(1, 8)) +
  scale_shape_manual(name = "Survey", values = survey_shapes, labels = survey_labels) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right",
    panel.grid.minor = element_blank(),
    strip.text = element_text(size = 12, face = "bold")
  ) +
  coord_flip() +
  labs(x = "", y = "Average Marginal Effect of Left-Right Position\n(+1 unit change) on Party Preference")

# Save the AME plot
ggsave(file.path("outputs/figures", "figure2_lr_effects.png"), 
       lr_ame_plot, width = 10, height = 6, bg = "white")

message("LR effect plot created successfully")

#==========================================================================================
# FIGURE GENERATION - LOG-ODDS WITH OUTLIER HANDLING
#==========================================================================================

# Extract log-odds for LR from each survey
plot_logodds <- map_dfr(valid_results, function(result) {
  if(is.null(result$lr_log_odds)) return(NULL)
  
  result$lr_log_odds %>%
    filter(party %in% plot_parties) %>%
    mutate(
      sample_size = result$n,
      lower = estimate - 1.96 * std.error,
      upper = estimate + 1.96 * std.error
    )
})

# Identify extreme outliers
outliers <- plot_logodds %>%
  group_by(party) %>%
  mutate(
    median_est = median(estimate, na.rm = TRUE),
    mad = mad(estimate, na.rm = TRUE),
    is_extreme = abs(estimate - median_est) > 5 * mad
  ) %>%
  filter(is_extreme)

if(nrow(outliers) > 0) {
  message("Identified ", nrow(outliers), " extreme outliers in log-odds data")
  print(outliers %>% select(survey, party, estimate))
  
  # Filter out extreme outliers from plot data
  plot_logodds_clean <- plot_logodds %>%
    anti_join(outliers %>% select(survey, party), by = c("survey", "party"))
} else {
  plot_logodds_clean <- plot_logodds
}

# Combine log-odds with sample sizes
plot_logodds_clean <- plot_logodds_clean %>%
  left_join(survey_sizes, by = c("survey", "party"))

# Get overall log-odds for plotting
overall_logodds <- plot_logodds %>%
  filter(survey == "Overall")

# Ensure parties appear in the original party_order
plot_logodds_clean$party <- factor(plot_logodds_clean$party, levels = plot_parties)
overall_logodds$party <- factor(overall_logodds$party, levels = plot_parties)

# Create LR log-odds plot with outliers removed
lr_logodds_plot <- ggplot() +
  # Overall log-odds with error bars
  geom_point(data = overall_logodds, 
             aes(x = party, y = estimate, color = party, size = sample_size),
             alpha = 0.7) +
  geom_linerange(data = overall_logodds, 
                 aes(x = party, ymin = lower, ymax = upper, color = party),
                 size = 1, alpha = 0.7) +
  # Survey-specific log-odds (with outliers removed)
  geom_point(data = plot_logodds_clean %>% filter(survey != "Overall"), 
             aes(x = party, y = estimate, color = party, size = party_sample_size, shape = survey),
             position = position_jitter(width = 0.2, height = 0), alpha = 0.5) +
  # Value labels
  geom_text(data = overall_logodds, 
            aes(x = party, y = estimate, label = sprintf("%.3f", estimate)),
            color = "black", vjust = -1, size = 3) +
  # Reference line at 0 (no effect)
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  # Styling
  scale_color_manual(values = party_colors, guide = "none") +
  scale_size_continuous(name = "Sample size (N)", 
                        breaks = c(50, 100, 500, 1000, 5000),
                        labels = scales::comma(c(50, 100, 500, 1000, 5000)),
                        range = c(1, 8)) +
  scale_shape_manual(name = "Survey", values = survey_shapes, labels = survey_labels) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right",
    panel.grid.minor = element_blank(),
    strip.text = element_text(size = 12, face = "bold")
  ) +
  coord_flip() +
  labs(title= "Left-Right Position (normalized)", x = "", 
       y = "Log-Odds Coefficient",
       caption = if(nrow(outliers) > 0) "Note: Extreme outliers removed for better visualization" else "")

# Save the log-odds plot
ggsave(file.path("outputs/figures", "figure2b_lr_logodds.png"), 
       lr_logodds_plot, width = 10, height = 6, bg = "white")

message("LR log-odds plot created successfully")

# Create a combined figure with both AME and log-odds plots
combined_plots <- lr_ame_plot / lr_logodds_plot +
  plot_layout(heights = c(1, 1)) +
  plot_annotation(
    title = "Effects of Left-Right Position on Party Preference",
    subtitle = "Average Marginal Effects (top) and Log-Odds Coefficients (bottom)",
    theme = theme(plot.title = element_text(size = 16, hjust = 0.5),
                  plot.subtitle = element_text(size = 12, hjust = 0.5))
  )

# Save the combined plot
ggsave(file.path("outputs/figures", "figure2c_lr_combined.png"), 
       combined_plots, width = 12, height = 12, bg = "white")

message("Combined LR effects plot created successfully")

#==========================================================================================
# TABLE GENERATION - AMEs
#==========================================================================================

# Create a comprehensive table with LR effects for all parties and surveys
comprehensive_table <- plot_data %>%
  filter(survey != "Overall") %>%  # Exclude Overall from the survey-specific part
  select(survey, party, predictor, variable, AME, SE, stars) %>%
  # Create a formatted AME column with stars and standard errors
  mutate(formatted_ame = sprintf("%.3f%s\n(%.3f)", AME, stars, SE)) %>%
  # Reshape to wide format with parties as columns
  pivot_wider(
    id_cols = c(survey, predictor, variable),
    names_from = party,
    values_from = formatted_ame
  ) %>%
  # Sort by predictor and survey
  arrange(predictor, variable, survey)

# Add Overall results from combined model
overall_table <- plot_data %>%
  filter(survey == "Overall") %>%
  select(survey, party, predictor, variable, AME, SE, stars) %>%
  # Format AMEs with stars and SEs
  mutate(formatted_ame = sprintf("%.3f%s\n(%.3f)", AME, stars, SE)) %>%
  # Reshape to wide format
  pivot_wider(
    id_cols = c(survey, predictor, variable),
    names_from = party,
    values_from = formatted_ame
  )

# Combine and create the final table
final_table <- bind_rows(comprehensive_table, overall_table) %>%
  # Ensure consistent column order
  select(survey, predictor, variable, all_of(plot_parties)) %>%
  # Sort properly
  arrange(predictor, variable, 
          case_when(
            survey == "Overall" ~ "ZZZ", # Make Overall appear last
            TRUE ~ survey
          )) %>%
  # Replace survey codes with readable labels
  mutate(survey = ifelse(survey %in% names(survey_labels), 
                         survey_labels[survey], 
                         as.character(survey)))

# Format as LaTeX table
latex_table <- kable(final_table, format = "latex", booktabs = TRUE, 
                     caption = "Table 2: Average Marginal Effects of Left-Right Position on Party Preference",
                     label = "tab:table2_lr",
                     escape = FALSE) %>%
  kable_styling(latex_options = c("striped", "scale_down", "hold_position")) %>%
  column_spec(1:3, bold = FALSE) %>%
  add_header_above(c(" " = 3, "Party (Reference: Abstainer/null)" = length(plot_parties))) %>%
  footnote(
    general = "Cell entries show Average Marginal Effects of a one-unit increase in Left-Right position on the probability of preferring each party (vs. Abstainer/null). Standard errors in parentheses. The 'Overall' row shows AMEs from a combined model with survey as a control variable.",
    symbol = c("p < 0.1", "p < 0.05", "p < 0.01", "p < 0.001"),
    symbol_manual = c(".", "*", "**", "***"),
    threeparttable = TRUE,
    escape = FALSE
  )

# Write to file
writeLines(latex_table, file.path("outputs/tables", "table2_lr_effects.tex"))

#==========================================================================================
# TABLE GENERATION - LOG-ODDS
#==========================================================================================

# Create a comprehensive table with LR log-odds for all parties and surveys
# Use the clean data (without outliers) for a more representative table
logodds_table <- plot_logodds_clean %>%
  filter(survey != "Overall") %>%  # Exclude Overall from the survey-specific part
  # Create a formatted log-odds column with stars and standard errors
  mutate(formatted_coef = sprintf("%.3f%s\n(%.3f)", estimate, stars, std.error)) %>%
  # Reshape to wide format with parties as columns
  pivot_wider(
    id_cols = c(survey),
    names_from = party,
    values_from = formatted_coef
  ) %>%
  # Sort by survey
  arrange(survey)

# Add Overall results from combined model
overall_logodds_table <- plot_logodds %>%
  filter(survey == "Overall") %>%
  # Format coefficients with stars and SEs
  mutate(formatted_coef = sprintf("%.3f%s\n(%.3f)", estimate, stars, std.error)) %>%
  # Reshape to wide format
  pivot_wider(
    id_cols = c(survey),
    names_from = party,
    values_from = formatted_coef
  )

# Combine and create the final log-odds table
final_logodds_table <- bind_rows(logodds_table, overall_logodds_table) %>%
  # Ensure consistent column order
  select(survey, all_of(plot_parties)) %>%
  # Sort properly
  arrange(case_when(
    survey == "Overall" ~ "ZZZ", # Make Overall appear last
    TRUE ~ survey
  )) %>%
  # Replace survey codes with readable labels
  mutate(survey = ifelse(survey %in% names(survey_labels), 
                         survey_labels[survey], 
                         as.character(survey)))

# If there were outliers, note them in the table footnote
outlier_note <- if(nrow(outliers) > 0) {
  outlier_info <- outliers %>%
    mutate(
      # Convert survey code to readable name if available
      survey_readable = ifelse(survey %in% names(survey_labels), 
                               survey_labels[survey], 
                               as.character(survey)),
      desc = sprintf("%s (%s: %.2f)", party, survey_readable, estimate)
    ) %>%
    pull(desc) %>%
    paste(collapse = ", ")
  paste("Extreme outliers removed from table for clarity:", outlier_info)
} else {
  ""
}

# Format as LaTeX table
latex_logodds_table <- kable(final_logodds_table, format = "latex", booktabs = TRUE, 
                             caption = "Table 2B: Log-Odds Coefficients of Left-Right Position on Party Preference",
                             label = "tab:table2B_lr_logodds",
                             escape = FALSE) %>%
  kable_styling(latex_options = c("striped", "scale_down", "hold_position")) %>%
  column_spec(1, bold = FALSE) %>%
  add_header_above(c(" " = 1, "Party (Reference: Abstainer/null)" = length(plot_parties))) %>%
  footnote(
    general = paste("Cell entries show log-odds coefficients for Left-Right position from multinomial logistic regression models predicting party preference. Standard errors in parentheses. The 'Overall' row shows coefficients from a combined model with survey as a control variable.", 
                    if(nchar(outlier_note) > 0) paste(" ", outlier_note) else ""),
    symbol = c("p < 0.1", "p < 0.05", "p < 0.01", "p < 0.001"),
    symbol_manual = c(".", "*", "**", "***"),
    threeparttable = TRUE,
    escape = FALSE
  )

# Write to file
writeLines(latex_logodds_table, file.path("outputs/tables", "table2B_lr_logodds.tex"))

#==========================================================================================
# MODEL FIT STATISTICS TABLE
#==========================================================================================

# Create a table with model fit statistics for each survey
model_fit_table <- map_dfr(valid_results, function(result) {
  tibble(
    Survey = result$survey,
    N = result$n,
    LL = sprintf("%.2f", result$model_fit$logLik),
    AIC = sprintf("%.2f", result$model_fit$AIC),
    BIC = sprintf("%.2f", result$model_fit$BIC),
    McFadden_R2 = sprintf("%.3f", result$model_fit$McFadden_R2),
    df = result$model_fit$df
  )
}) %>%
  # Replace survey codes with readable labels
  mutate(Survey = ifelse(Survey %in% names(survey_labels), 
                         survey_labels[Survey], 
                         as.character(Survey)))

# Format as LaTeX table
latex_model_fit <- kable(model_fit_table, format = "latex", booktabs = TRUE,
                         caption = "Table 2A: Model Fit Statistics for Multinomial Models by Survey",
                         label = "tab:table2A_model_fit",
                         col.names = c("Survey", "N", "Log-Likelihood", "AIC", "BIC", "McFadden R²", "df"),
                         align = c("l", "r", "r", "r", "r", "r", "r"),
                         escape = FALSE) %>%
  kable_styling(latex_options = c("striped", "hold_position")) %>%
  footnote(
    general = "All survey-specific models include left-right position, gender, age group, and education as predictors. The 'Overall' model includes the same predictors plus survey as a control variable.",
    threeparttable = TRUE
  )

# Write to file
writeLines(latex_model_fit, file.path("outputs/tables", "table2A_model_fit_statistics.tex"))

message("Analysis completed. Output files saved.")
